library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
#install.packages("gganimate")
#library(gganimate)
library("ggpubr")
library(ggsci)
setwd("~/OneDrive\ -\ Michigan\ State\ University/Manning_lab/Mastitis_project/GitHub/data/assembly")
#merging 2 files same ID --> first column on both files
metaquast_ref =read.table("metaquast_reference.csv", sep = ",", header = TRUE, na.strings = "",check.names = F)
metaquast_noalign =read.table("metaquast_not_aligned.csv", sep = ",", header = TRUE, na.strings = "",check.names = F)
metaquast_combref =read.table("metaquast_combined_reference.csv", sep = ",", header = TRUE, na.strings = "",check.names = F)
#STATISTICS #normality tests
shapiro.test(metaquast_ref$`N50 (Kbp)`) #significant p-value = non normal
##
## Shapiro-Wilk normality test
##
## data: metaquast_ref$`N50 (Kbp)`
## W = 0.80995, p-value < 2.2e-16
ggqqplot(metaquast_ref$`N50 (Kbp)`) #Q-Q plot (or quantile-quantile plot) draws the correlation between a given sample and the normal distribution. A 45-degree reference line is also plotted.
#http://www.sthda.com/english/wiki/normality-test-in-r
ggdensity(metaquast_ref$`N50 (Kbp)`)
#if it is not normal use nonparametric methods = Wilcoxon
total = metaquast_combref %>%
select(assembler, `N50 (Kbp)`)
wilcox.test(data = total, `N50 (Kbp)` ~ assembler, alternative = c("two.sided", "less", "greater"), paired = F, conf.level = 0.95)
##
## Wilcoxon rank sum test with continuity correction
##
## data: N50 (Kbp) by assembler
## W = 11877, p-value = 0.3037
## alternative hypothesis: true location shift is not equal to 0
#the total CFU/g is greather in Group A in block 5. The other blocks had similar number of CFU between groups. No significant fdifference overall
#REFERENCE
N50_ref = metaquast_ref %>%
ggplot(aes(y = `N50 (Kbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 6, label.x.npc = .3) +
scale_fill_aaas()
N50_ref
L75_ref = metaquast_ref %>%
ggplot(aes(y = `L75 (K)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 1100, label.x.npc = .3) +
scale_fill_aaas()
L75_ref
largest_ref = metaquast_ref %>%
ggplot(aes(y = `Largest contig (Kbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 6, label.x.npc = .3) +
scale_fill_aaas()
largest_ref
length_ref = metaquast_ref %>%
ggplot(aes(y = `Length (Mbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 2.8, label.x.npc = .3) +
scale_fill_aaas()
length_ref
mismatch_ref = metaquast_ref %>%
ggplot(aes(y = `Mismatches/100kbp`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 7800, label.x.npc = .3) +
scale_fill_aaas()
mismatch_ref
misassem_ref = metaquast_ref %>%
ggplot(aes(y = Misassemblies, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 75, label.x.npc = .3) +
scale_fill_aaas()
misassem_ref
indels_ref = metaquast_ref %>%
ggplot(aes(y = `Indels/100kbp`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 600, label.x.npc = .3) +
scale_fill_aaas()
indels_ref
genofra_ref = metaquast_ref %>%
ggplot(aes(y = `Genome Fraction`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = .5, label.x.npc = .3) +
scale_fill_aaas()
genofra_ref
#COMBINED REFERENCE
N50_comb = metaquast_combref %>%
ggplot(aes(y = `N50 (Kbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 1.05, label.x.npc = .3) +
scale_fill_aaas()
N50_comb
L75_comb = metaquast_combref %>%
ggplot(aes(y = `L75 (K)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 23000, label.x.npc = .3) +
scale_fill_aaas()
L75_comb
largest_comb = metaquast_combref %>%
ggplot(aes(y = `Largest contig (Kbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 125, label.x.npc = .3) +
scale_fill_aaas()
largest_comb
length_comb = metaquast_combref %>%
ggplot(aes(y = `Length (Mbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 30, label.x.npc = .3) +
scale_fill_aaas()
length_comb
mismatch_comb = metaquast_combref %>%
ggplot(aes(y = `Mismatches/100kbp`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 7800, label.x.npc = .3) +
scale_fill_aaas()
mismatch_comb
misassem_comb = metaquast_combref %>%
ggplot(aes(y = Misassemblies, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 150, label.x.npc = .3) +
scale_fill_aaas()
misassem_comb
indels_comb = metaquast_combref %>%
ggplot(aes(y = `Indels/100kbp`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 120, label.x.npc = .3) +
scale_fill_aaas()
indels_comb
genofra_comb = metaquast_combref %>%
ggplot(aes(y = `Genome Fraction`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = .075, label.x.npc = .3) +
scale_fill_aaas()
genofra_comb
#NOT ALIGNED
N50_na = metaquast_noalign %>%
ggplot(aes(y = `N50 (Kbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = .95, label.x.npc = .3) +
scale_fill_aaas()
N50_na
L50_na = metaquast_noalign %>%
ggplot(aes(y = `L50 (K)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 12, label.x.npc = .3) +
scale_fill_aaas()
L50_na
L75_na = metaquast_noalign %>%
ggplot(aes(y = `L75 (K)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 23000, label.x.npc = .3) +
scale_fill_aaas()
L75_na
L75_na = metaquast_noalign %>%
ggplot(aes(y = `L75 (K)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 23000, label.x.npc = .3) +
scale_fill_aaas()
L75_na
largest_na = metaquast_noalign %>%
ggplot(aes(y = `Largest contig (Kbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 125, label.x.npc = .3) +
scale_fill_aaas()
largest_na
length_na = metaquast_noalign %>%
ggplot(aes(y = `Length (Mbp)`, x = assembler, fill = assembler)) +
geom_boxplot(outlier.shape = NA) +
geom_point(alpha=0.2, position = position_jitterdodge(), size = 2.5) +
labs(x="Assembler",fill = "Assembler") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme_classic() +
theme(axis.text = element_text(colour = "black"), strip.text = element_text(colour = "black"),legend.position = "none") +
stat_compare_means(label.y = 30, label.x.npc = .3) +
scale_fill_aaas()
length_na
not_aligned_plots = ggarrange(N50_na,L75_na,largest_na,length_na,nrow = 1)
not_aligned_plots
combinedref_plots = ggarrange(N50_comb,L75_comb,largest_comb,length_comb,misassem_comb,mismatch_comb,indels_comb,genofra_comb,nrow = 2,ncol=4)
combinedref_plots
ref_plots = ggarrange(N50_ref,L75_ref,largest_ref,length_ref,misassem_ref,mismatch_ref,indels_ref,genofra_ref,nrow = 2,ncol=4)
ref_plots
assembly_plots = ggarrange(ref_plots,combinedref_plots,not_aligned_plots,nrow=3, labels = c("A","B","C"), heights = c(1,1,.5))
assembly_plots
setwd("/Users/karlavasco/OneDrive\ -\ Michigan\ State\ University/Manning_lab/AMR/results/QUAST")
# mm to inch
setWidth = 10
# font size in pt
setFontSize = 4
# 1 in R = 0.75pt, so 0.25pt is specified as
setLwd <- 0.25/0.75
pdf(file='metaQUAST_combref.pdf',width=setWidth,height=10,pointsize=setFontSize)
assembly_plots
dev.off()
## quartz_off_screen
## 2
setwd("/Users/karlavasco/OneDrive\ -\ Michigan\ State\ University/Manning_lab/AMR/results/QUAST")
# mm to inch
setWidth = 11
# font size in pt
setFontSize = 4
# 1 in R = 0.75pt, so 0.25pt is specified as
setLwd <- 0.25/0.75
pdf(file='metaQUAST_combref.pdf',width=setWidth,height=6,pointsize=setFontSize)
combinedref_plots
dev.off()
## quartz_off_screen
## 2